{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Accelerating GFile with prefetching"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Import tensorflow and gfile. Just make sure the version is at least `PAI1911`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "uuid": "db91ee55-6e89-4ab9-a8b5-5e66bbf8414a"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'1.12.2-PAI1911'"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "from tensorflow.python.platform import gfile\n",
    "\n",
    "tf.__version__"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We use OSS as example, so let's do some preparation.\n",
    "\n",
    "1. import `tensorflow_io.oss` which is DSW built-in.\n",
    "2. setup your OSS configuration."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "uuid": "0491d81a-cb5e-4616-94de-c7a9a1b88401"
   },
   "outputs": [],
   "source": [
    "import tensorflow_io.oss\n",
    "\n",
    "ACCESS_ID=\"<your access id>\"\n",
    "ACCESS_KEY=\"<your access key>\"\n",
    "HOST = \"<your host>\"\n",
    "BUCKET=\"<your bucket>\"\n",
    "OSS_BUCKET_ROOT=\"{}\\x01id={}\\x02key={}\\x02host={}/\".format(BUCKET, ACCESS_ID, ACCESS_KEY, HOST)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Finally, let's setup an `download` function and make some benchmark."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "uuid": "5ef66f03-06e6-45bd-b359-70b252835c80"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time (s) elapsed: 66.92 s, threads: 0, speed: 8.02 MB/s\n",
      "time (s) elapsed: 25.68 s, threads: 4, speed: 20.90 MB/s\n"
     ]
    }
   ],
   "source": [
    "from time import time\n",
    "\n",
    "def download(file, prefetch_threads=0):\n",
    "    start = time()\n",
    "    total_read = 0\n",
    "    _1MB = 1024 * 1024\n",
    "    oss_file = OSS_BUCKET_ROOT + file\n",
    "    \n",
    "    # The extra `prefetch_threads` argument is the magic here. It defaults 0.\n",
    "    with tf.gfile.GFile(oss_file, mode=\"rb\", prefetch_threads=prefetch_threads) as f:\n",
    "        while True:\n",
    "            buf_len = len(f.read(_1MB))\n",
    "            total_read += buf_len\n",
    "            if buf_len < _1MB:\n",
    "                break\n",
    "    elapsed_sec = time() - start\n",
    "    total_read_MB = 1.0 * total_read / _1MB\n",
    "    speed =  total_read_MB / elapsed_sec\n",
    "    print(\"time (s) elapsed: %.2f s, threads: %d, speed: %.2f MB/s\" \n",
    "          % (elapsed_sec, prefetch_threads, speed))\n",
    "\n",
    "file = \"<your file to download>\"\n",
    "\n",
    "download(file)\n",
    "download(file, prefetch_threads=4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}